---
title: Appendix - YouGov Survey
author: Weifang Xu, Taylor Chewning, and Qing Wang
date: September 16, 2024
output: pdf_document
fontsize: 11 pt
header-includes:
  \usepackage[T1]{fontenc}
  \usepackage[utf8]{inputenc}
  \usepackage{newpxtext,newpxmath}
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```

```{r}
## Clean the working environment and set up the working directory 
rm(list = ls())
setwd("/Users/qingwang/Downloads/Data Replication")

# load the libraries
library(tidyverse)
library(kableExtra)
library(haven)
library(ggthemes)
#install the flexpath package
# devtools::install_github("dustinfife/flexplot")
library(flexplot)
library(sandwich)
library(lmtest)
library(texreg)
library(boot)
library(xtable)
library(modelsummary)
library(marginaleffects)
library(ggpubr)
library(pBrackets)
library(lemon)
library(arm)
library(gplots)
library(extrafont)
# install the compactr package
# url <- "https://cran.r-project.org/src/contrib/Archive/compactr/compactr_0.1.tar.gz"
# install.packages(url, repos = NULL, type = "source")
library(compactr)

# load the dataset
df_ces <- read_dta("YouGov/YouGov_clean.dta")
df_ces_post <- read_dta("YouGov/YouGov_post_clean.dta")

```

```{r}
#### Table S8: Sample Demographics in Comparison with Census Benchmarks (YouGov Survey) ####

# benchmark demographic data is obtained from Table S0101 of the 2021 American Community Survey
# link to the survey: https://data.census.gov/table/ACSST1Y2021.S0101?q=S0101

# calculate the demographic of the PureSpectrum sample
# sex
male_percent <- df_ces %>%
  group_by(male) %>%
  summarise(percentage = round(n() / nrow(df_ces) * 100, 1))
male_percent

# age
# age ==1, 18–29
# age ==2, 30–39
# age ==3, 40–49
# age ==4, 50–59
# age ==5, 60–69
# age ==6, 70 or above

age_percent <- df_ces %>%
  mutate(age_demo = case_when(age <= 29 ~ '1',
                              age >= 30 & age <= 39 ~ '2',
                              age >= 40 & age <= 49 ~ '3',
                              age >= 50 & age <= 59 ~ '4',
                              age >= 60 & age <= 69 ~ '5',
                              age >= 70 ~ '6')) %>%
  group_by(age_demo) %>%
  summarise(percentage = round(n() / nrow(df_ces) * 100, 1))
age_percent

# race
# race ==1, White
# race ==2, Black
# race ==3, Hispanic
# race ==4, Asian
# race ==5, Native American
# race ==6, Two or more races
# race ==7, Other
# race ==8, Middle Eastern

race_percent <- df_ces %>%
  group_by(race) %>%
  summarise(percentage = round(n() / nrow(df_ces) * 100, 1))
race_percent

# calculate the "Other" race category
race_other = race_percent[4,2] + race_percent[5,2] + race_percent[6,2] + race_percent[7,2] + race_percent[8,2]
race_other
```

```{r, out.extra='angle=90', results='asis'}
#### Table S9: Regression Estimates of Support for War (Binary Dependent Variable, YouGov Survey) ####

# convert the inc variable unit ($ to 10k$)
df_ces <- df_ces %>% 
  mutate(inc_10k = inc/10000)

# run the regression models
m1_ces <- lm(attack ~ hmrts + alliance, data = df_ces)
m2_ces <- lm(attack ~ alliance * hmrts, data = df_ces)
m3_ces <- lm(attack ~ alliance*hmrts + 
           male + age_cat + edu4 + inc_10k, data = df_ces)

texreg(l = list(m1_ces, m2_ces, m3_ces),
       reorder.coef= c(2, 3, 4, 5, 6, 7, 8, 1),
       custom.coef.names = c("(Intercept)", "Violating Human Rights", "U.S. Military Alliance",
                             "Violating Human Rights $\\times$ U.S. Military Alliance",
                             "Male", "Age", "Education", "Income"),
       stars = c(0.01, 0.05, 0.1),
       digits = 2,
       caption = "Linear Estimate of Public Approval for Attacking the Third Country (CES Survey)",
       caption.above = T,
       include.ci = F,
       include.rmse =F,
       include.rsq = F,
       include.adjrs = F,
       label = "",
       custom.note = "",
       fontsize = "small") %>%
  gsub(".begin.center.", "\\\\centering", .) %>%
  gsub(".end.center.", "", .)
```

```{r}
#### Figure S10: Impact of Treatments on Support for War (Binary Dependent Variable, YouGov Sur- vey, 95% Confidence Intervals) ####

### Plot (10a) Support for Attack
# calculate the mean support for attack in each treatment group
df_ate_ces <- df_ces %>% 
  group_by(exp_4) %>%
  summarise(ate = mean(attack, na.rm = TRUE),
            n = n(),
            se = sd(attack, na.rm = TRUE) / sqrt(n)) %>%
  mutate(ci_low = ate - 1.96*se,
         ci_high = ate + 1.96*se)

# plot the mean support for attack in each treatment group and the 95% CI
p <- ggplot(df_ate_ces, aes(x = factor(exp_4, level=c('1', '3', '2', '4')), y = ate,
                       color=factor(exp_4))) + 
  theme_classic() +
  geom_point()+
  geom_errorbar(aes(ymin=ci_low, ymax=ci_high), width=.1,
                position=position_dodge(0.05)) +
  scale_x_discrete(labels= c('Non-Ally,Violates HR', 'Ally,Violates HR',
                             'Non-Ally,Protects HR', "Ally,Protects HR")) +
  scale_color_manual(values=c('#999999', 'black', 'black', 'black')) +
  theme(legend.position = "none") +
  labs(x = "", y = "Mean Support for Attack \n (% point)", size = 12) +
  geom_text(aes(label=round(ate, 1)), position=position_dodge(width=0.9), 
            vjust=.5, hjust = -.35) +
  theme(axis.text.x = element_text(angle = 20, hjust = 0.5, vjust = 0.5, size = 12),
        axis.title.y = element_text(size=12))
p
# ggsave("ate-ces.pdf", width = 6, height = 4)

### Plot (10b) Average Treatment Effect on Support for Attack
# calculate the difference in support for attack between 2-4 against baseline condition
est <- rep(NA, 4)
ci_low <- rep(NA, 4)
ci_high <- rep(NA, 4)
se <- rep(NA, 4)

for(i in 2:4){
  test <- t.test(df_ces$attack[df_ces$exp_4==i], 
                 df_ces$attack[df_ces$exp_4==1])
  est[i] <- test[["estimate"]][["mean of x"]] - test[["estimate"]][["mean of y"]]
  ci_low[i] <- test[["conf.int"]][1]
  ci_high[i] <- test[["conf.int"]][2]
  se[i] <- test[["stderr"]]
}

df_ate_diff_ces <- data.frame(exp_4 = df_ate_ces$exp_4, est, ci_low, ci_high, se)
df_ate_diff_ces <- df_ate_diff_ces[-1, ]

# plot the differences and 95% CI
p1 <- ggplot(df_ate_diff_ces, aes(x = factor(exp_4, level=c('3', '2', '4')), y = est)) + 
  theme_classic() +
  geom_point()+
  geom_errorbar(aes(ymin=ci_low, ymax=ci_high), width=.1,
                position=position_dodge(0.05)) +
  scale_x_discrete(labels= c('Ally,Violates HR', 'Non-Ally,Protects HR', 
                             'Ally,Protects HR')) +
  labs(x = "", y = "Average Treatment Effect \n (% point)", size = 12) +
  geom_text(aes(label=round(est, 1)), position=position_dodge(width=0.9), 
            vjust=.5, hjust = -.35) +
  geom_hline(yintercept = 0, linetype="dotted") +
  theme(axis.text.x = element_text(angle = 20, hjust = 0.5, vjust = 0.5, size = 12),
        axis.title.y = element_text(size=12))
p1
# ggsave("ate-diff-ces.pdf", width = 6, height = 4)
```

```{r}
#### Table S11: Means of Public Support for War (Binary Dependent Variable, YouGov survey) ####
# group_mean of pre-election survey
mean_pre <- df_ces %>% 
  group_by(exp_4) %>%
  filter(complete.cases(attack)) %>%
  summarise(mean = mean(attack, na.rm = TRUE),
            n = n())
mean_pre

# group mean of post-election survey
mean_post <- df_ces_post %>% 
  group_by(exp_4) %>%
  filter(complete.cases(attack_post)) %>%
  summarise(mean = mean(attack_post, na.rm = TRUE),
            n = n())
mean_post

# difference of means
mean_post$mean - mean_pre$mean

# t.test by treatment group to obtain p-values
lapply(split(df_ces_post, factor(df_ces_post$exp_4)), 
       function(x)t.test(data=x, attack_post ~ attack))
```


